Short vs. long RNA-seq (differentiation over timecourse)
Load packages
Load data
bambu_0.025_gene <- df_list_bambu$bambu_0.025_gene
bambu_0.05_gene <- df_list_bambu$bambu_0.05_gene
bambu_0.1_gene <- df_list_bambu$bambu_0.1_gene
bambu_0.2_gene <- df_list_bambu$bambu_0.2_gene
salmon_0.025_gene <- df_list_salmon$salmon_0.025_gene
salmon_0.05_gene <- df_list_salmon$salmon_0.05_gene
salmon_0.1_gene <- df_list_salmon$salmon_0.1_gene
salmon_0.2_gene <- df_list_salmon$salmon_0.2_gene
meta_samples <- df_list_meta$metadataCreate DGEList objects and filter expressed genes
0.025
y_salmon_0.025 <- DGEList(counts = salmon_0.025_gene, group = day)
y_bambu_0.025 <- DGEList(counts = bambu_0.025_gene, group = day)
expressed_salmon <- filterByExpr(y_salmon_0.025)
expressed_bambu <- filterByExpr(y_bambu_0.025)
not_expressed <-
intersect(names(expressed_salmon[!expressed_salmon]), names(expressed_bambu[!expressed_bambu]))
if (length(not_expressed) > 0) {
y_salmon_0.025 <-
y_salmon_0.025[-which(rownames(salmon_0.025_gene) %in% not_expressed), , keep.lib.sizes = FALSE]
y_bambu_0.025 <-
y_bambu_0.025[-which(rownames(bambu_0.025_gene) %in% not_expressed), , keep.lib.sizes = FALSE]
}0.05
y_salmon_0.05 <- DGEList(counts = salmon_0.05_gene, group = day)
y_bambu_0.05 <- DGEList(counts = bambu_0.05_gene, group = day)
expressed_salmon <- filterByExpr(y_salmon_0.05)
expressed_bambu <- filterByExpr(y_bambu_0.05)
not_expressed <-
intersect(names(expressed_salmon[!expressed_salmon]), names(expressed_bambu[!expressed_bambu]))
if (length(not_expressed) > 0) {
y_salmon_0.05 <-
y_salmon_0.05[-which(rownames(salmon_0.05_gene) %in% not_expressed), , keep.lib.sizes = FALSE]
y_bambu_0.05 <-
y_bambu_0.05[-which(rownames(bambu_0.05_gene) %in% not_expressed), , keep.lib.sizes = FALSE]
}0.1
y_salmon_0.1 <- DGEList(counts = salmon_0.1_gene, group = day)
y_bambu_0.1 <- DGEList(counts = bambu_0.1_gene, group = day)
expressed_salmon <- filterByExpr(y_salmon_0.1)
expressed_bambu <- filterByExpr(y_bambu_0.1)
not_expressed <-
intersect(names(expressed_salmon[!expressed_salmon]), names(expressed_bambu[!expressed_bambu]))
if (length(not_expressed) > 0) {
y_salmon_0.1 <-
y_salmon_0.1[-which(rownames(salmon_0.1_gene) %in% not_expressed), , keep.lib.sizes = FALSE]
y_bambu_0.1 <-
y_bambu_0.1[-which(rownames(bambu_0.1_gene) %in% not_expressed), , keep.lib.sizes = FALSE]
}0.2
y_salmon_0.2 <- DGEList(counts = salmon_0.2_gene, group = day)
y_bambu_0.2 <- DGEList(counts = bambu_0.2_gene, group = day)
expressed_salmon <- filterByExpr(y_salmon_0.2)
expressed_bambu <- filterByExpr(y_bambu_0.2)
not_expressed <-
intersect(names(expressed_salmon[!expressed_salmon]), names(expressed_bambu[!expressed_bambu]))
if (length(not_expressed) > 0) {
y_salmon_0.2 <-
y_salmon_0.2[-which(rownames(salmon_0.2_gene) %in% not_expressed), , keep.lib.sizes = FALSE]
y_bambu_0.2 <-
y_bambu_0.2[-which(rownames(bambu_0.2_gene) %in% not_expressed), , keep.lib.sizes = FALSE]
}Differential gene expression analysis
(Intercept) day1 day2 day3 day4 day5
1 1 0 0 0 0 0
2 1 0 0 0 0 0
3 1 0 0 0 0 0
4 1 1 0 0 0 0
5 1 0 1 0 0 0
6 1 0 0 1 0 0
7 1 0 0 1 0 0
8 1 0 0 0 1 0
9 1 0 0 0 0 1
10 1 0 0 0 0 1
11 1 0 0 0 0 1
attr(,"assign")
[1] 0 1 1 1 1 1
attr(,"contrasts")
attr(,"contrasts")$day
[1] "contr.treatment"
0.025
# Salmon 0.025
y_salmon_0.025 <- calcNormFactors(y_salmon_0.025)
## estimate dispersion
y_salmon_0.025 <- estimateDisp(y_salmon_0.025, design)
## fit the negative binomial model
fit <- glmFit(y_salmon_0.025, design)
## conduct genewise statistical tests for a given coefficient contrast (coef = row of design matrix)
lrtS <- glmLRT(fit, coef = 2:6)
#topTags(lrt)
## multiple testing correction
decide_dif_s0.025 <-
decideTests.DGELRT(lrtS,
adjust.method = "BH",
p.value = 0.01)
summary(decide_dif_s0.025) day5-day4-day3-day2-day1
NotSig 17374
Sig 7343
# Bambu 0.025
y_bambu_0.025 <- calcNormFactors(y_bambu_0.025)
## estimate dispersion
y_bambu_0.025 <- estimateDisp(y_bambu_0.025, design)
## fit the negative binomial model
fit <- glmFit(y_bambu_0.025, design)
## conduct genewise statistical tests for a given coefficient contrast (coef = row of design matrix)
lrtB <- glmLRT(fit, coef = 2:6)
#topTags(lrt)
## multiple testing correction
decide_dif_b0.025 <-
decideTests.DGELRT(lrtB,
adjust.method = "BH",
p.value = 0.01)
summary(decide_dif_b0.025) day5-day4-day3-day2-day1
NotSig 16879
Sig 7838
# overlay results of both technologies
de_genes_0.025 <- as.data.frame(cbind(decide_dif_s0.025, decide_dif_b0.025))
colnames(de_genes_0.025) <- c("Salmon_0.025", "Bambu_0.025")
upset(de_genes_0.025, sets = colnames(de_genes_0.025))# comparison of padj and logfc
padjS <- p.adjust(lrtS$table$PValue, method = "BH")
padjB <- p.adjust(lrtB$table$PValue, method = "BH")
padj <- data.frame(Salmon = padjS, Bambu = padjB)
logfcS <- lrtS$table$logFC.day5
logfcB <- lrtB$table$logFC.day5
logfc <- data.frame(Salmon = logfcS, Bambu = logfcB)
ggplot(data = padj, mapping = aes(x = -log10(Salmon), y = -log10(Bambu))) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "blue") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis() +
labs(title = "Comparison of P-values (NDR=0.025)", x = "-log10 P-value (Salmon)", y = "-log10 P-value (Bambu)")ggplot(data = logfc, mapping = aes(x = Salmon, y = Bambu)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "blue") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis() +
labs(title = "Comparison of LogFC (NDR=0.025)", x = "LogFC (Salmon)", y = "LogFC (Bambu)")# time point comparison
## Salmon
fit <- glmFit(y_salmon_0.025, design)
lrt <- glmLRT(fit, coef = 6)
decide_dif_s0.025_tp <-
decideTests.DGELRT(lrt,
adjust.method = "BH",
p.value = 0.01,
lfc = log2(2))
summary(decide_dif_s0.025_tp) day5
Down 2377
NotSig 18283
Up 4057
ggplot(
data = as.data.frame(lrt$table),
mapping = aes(
x = logFC,
y = -log10(p.adjust(PValue, method = "BH")),
color = as.factor(decide_dif_s0.025_tp)
)
) +
geom_point() +
geom_vline(xintercept = log2(2), linetype = "dashed") +
geom_vline(xintercept = -log2(2), linetype = "dashed") +
geom_hline(yintercept = -log10(0.01), linetype = "dashed") +
labs(
title = "Differential gene expression Salmon 0.025 Day5-Day0",
x = "LogFC",
y = "-10log(p-value)",
color = "DE"
) +
scale_color_viridis_d() +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))## Bambu
fit <- glmFit(y_bambu_0.025, design)
lrt <- glmLRT(fit, coef = 6)
decide_dif_b0.025_tp <-
decideTests.DGELRT(lrt,
adjust.method = "BH",
p.value = 0.01,
lfc = log2(2))
summary(decide_dif_b0.025_tp) day5
Down 2014
NotSig 19559
Up 3144
ggplot(
data = as.data.frame(lrt$table),
mapping = aes(
x = logFC,
y = -log10(p.adjust(PValue, method = "BH")),
color = as.factor(decide_dif_b0.025_tp)
)
) +
geom_point() +
geom_vline(xintercept = log2(2), linetype = "dashed") +
geom_vline(xintercept = -log2(2), linetype = "dashed") +
geom_hline(yintercept = -log10(0.01), linetype = "dashed") +
labs(
title = "Differential gene expression Bambu 0.025 Day5-Day0",
x = "LogFC",
y = "-10log(p-value)",
color = "DE"
) +
scale_color_viridis_d() +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))0.05
# Salmon 0.05
y_salmon_0.05 <- calcNormFactors(y_salmon_0.05)
## estimate dispersion
y_salmon_0.05 <- estimateDisp(y_salmon_0.05, design)
## fit the negative binomial model
fit <- glmFit(y_salmon_0.05, design)
## conduct genewise statistical tests for a given coefficient contrast (coef = row of design matrix)
lrtS <- glmLRT(fit, coef = 2:6)
#topTags(lrt)
## multiple testing correction
decide_dif_s0.05 <-
decideTests.DGELRT(lrtS,
adjust.method = "BH",
p.value = 0.01)
summary(decide_dif_s0.05) day5-day4-day3-day2-day1
NotSig 17444
Sig 7367
# Bambu 0.05
y_bambu_0.05 <- calcNormFactors(y_bambu_0.05)
## estimate dispersion
y_bambu_0.05 <- estimateDisp(y_bambu_0.05, design)
## fit the negative binomial model
fit <- glmFit(y_bambu_0.05, design)
## conduct genewise statistical tests for a given coefficient contrast (coef = row of design matrix)
lrtB <- glmLRT(fit, coef = 2:6)
#topTags(lrt)
## multiple testing correction
decide_dif_b0.05 <-
decideTests.DGELRT(lrtB,
adjust.method = "BH",
p.value = 0.01)
summary(decide_dif_b0.05) day5-day4-day3-day2-day1
NotSig 16927
Sig 7884
# overlay results of both technologies
de_genes_0.05 <- as.data.frame(cbind(decide_dif_s0.05, decide_dif_b0.05))
colnames(de_genes_0.05) <- c("Salmon_0.05", "Bambu_0.05")
upset(de_genes_0.05, sets = colnames(de_genes_0.05))# comparison of padj and logfc
padjS <- p.adjust(lrtS$table$PValue, method = "BH")
padjB <- p.adjust(lrtB$table$PValue, method = "BH")
padj <- data.frame(Salmon = padjS, Bambu = padjB)
logfcS <- lrtS$table$logFC.day5
logfcB <- lrtB$table$logFC.day5
logfc <- data.frame(Salmon = logfcS, Bambu = logfcB)
ggplot(data = padj, mapping = aes(x = -log10(Salmon), y = -log10(Bambu))) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "blue") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis() +
labs(title = "Comparison of P-values (NDR=0.05)", x = "-log10 P-value (Salmon)", y = "-log10 P-value (Bambu)")ggplot(data = logfc, mapping = aes(x = Salmon, y = Bambu)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "blue") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis() +
labs(title = "Comparison of LogFC (NDR=0.05)", x = "LogFC (Salmon)", y = "LogFC (Bambu)")# time point comparison
## Salmon
fit <- glmFit(y_salmon_0.05, design)
lrt <- glmLRT(fit, coef = 6)
decide_dif_s0.05_tp <-
decideTests.DGELRT(lrt,
adjust.method = "BH",
p.value = 0.01,
lfc = log2(2))
summary(decide_dif_s0.05_tp) day5
Down 2388
NotSig 18352
Up 4071
ggplot(
data = as.data.frame(lrt$table),
mapping = aes(
x = logFC,
y = -log10(p.adjust(PValue, method = "BH")),
color = as.factor(decide_dif_s0.05_tp)
)
) +
geom_point() +
geom_vline(xintercept = log2(2), linetype = "dashed") +
geom_vline(xintercept = -log2(2), linetype = "dashed") +
geom_hline(yintercept = -log10(0.01), linetype = "dashed") +
labs(
title = "Differential gene expression Salmon 0.05 Day5-Day0",
x = "LogFC",
y = "-10log(p-value)",
color = "DE"
) +
scale_color_viridis_d() +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))## Bambu
fit <- glmFit(y_bambu_0.05, design)
lrt <- glmLRT(fit, coef = 6)
decide_dif_b0.05_tp <-
decideTests.DGELRT(lrt,
adjust.method = "BH",
p.value = 0.01,
lfc = log2(2))
summary(decide_dif_b0.05_tp) day5
Down 2031
NotSig 19608
Up 3172
ggplot(
data = as.data.frame(lrt$table),
mapping = aes(
x = logFC,
y = -log10(p.adjust(PValue, method = "BH")),
color = as.factor(decide_dif_b0.05_tp)
)
) +
geom_point() +
geom_vline(xintercept = log2(2), linetype = "dashed") +
geom_vline(xintercept = -log2(2), linetype = "dashed") +
geom_hline(yintercept = -log10(0.01), linetype = "dashed") +
labs(
title = "Differential gene expression Bambu 0.05 Day5-Day0",
x = "LogFC",
y = "-10log(p-value)",
color = "DE"
) +
scale_color_viridis_d() +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))0.1
# Salmon 0.1
y_salmon_0.1 <- calcNormFactors(y_salmon_0.1)
## estimate dispersion
y_salmon_0.1 <- estimateDisp(y_salmon_0.1, design)
## fit the negative binomial model
fit <- glmFit(y_salmon_0.1, design)
## conduct genewise statistical tests for a given coefficient contrast (coef = row of design matrix)
lrtS <- glmLRT(fit, coef = 2:6)
#topTags(lrt)
## multiple testing correction
decide_dif_s0.1 <-
decideTests.DGELRT(lrtS,
adjust.method = "BH",
p.value = 0.01)
summary(decide_dif_s0.1) day5-day4-day3-day2-day1
NotSig 17575
Sig 7529
# Bambu 0.1
y_bambu_0.1 <- calcNormFactors(y_bambu_0.1)
## estimate dispersion
y_bambu_0.1 <- estimateDisp(y_bambu_0.1, design)
## fit the negative binomial model
fit <- glmFit(y_bambu_0.1, design)
## conduct genewise statistical tests for a given coefficient contrast (coef = row of design matrix)
lrtB <- glmLRT(fit, coef = 2:6)
#topTags(lrt)
## multiple testing correction
decide_dif_b0.1 <-
decideTests.DGELRT(lrtB,
adjust.method = "BH",
p.value = 0.01)
summary(decide_dif_b0.1) day5-day4-day3-day2-day1
NotSig 17056
Sig 8048
# overlay results of both technologies
de_genes_0.1 <- as.data.frame(cbind(decide_dif_s0.1, decide_dif_b0.1))
colnames(de_genes_0.1) <- c("Salmon_0.1", "Bambu_0.1")
upset(de_genes_0.1, sets = colnames(de_genes_0.1))# comparison of padj and logfc
padjS <- p.adjust(lrtS$table$PValue, method = "BH")
padjB <- p.adjust(lrtB$table$PValue, method = "BH")
padj <- data.frame(Salmon = padjS, Bambu = padjB)
logfcS <- lrtS$table$logFC.day5
logfcB <- lrtB$table$logFC.day5
logfc <- data.frame(Salmon = logfcS, Bambu = logfcB)
ggplot(data = padj, mapping = aes(x = -log10(Salmon), y = -log10(Bambu))) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "blue") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis() +
labs(title = "Comparison of P-values (NDR=0.1)", x = "-log10 P-value (Salmon)", y = "-log10 P-value (Bambu)")ggplot(data = logfc, mapping = aes(x = Salmon, y = Bambu)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "blue") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis() +
labs(title = "Comparison of LogFC (NDR=0.1)", x = "LogFC (Salmon)", y = "LogFC (Bambu)")# time point comparison
## Salmon
fit <- glmFit(y_salmon_0.1, design)
lrt <- glmLRT(fit, coef = 6)
decide_dif_s0.1_tp <-
decideTests.DGELRT(lrt,
adjust.method = "BH",
p.value = 0.01,
lfc = log2(2))
summary(decide_dif_s0.1_tp) day5
Down 2467
NotSig 18544
Up 4093
ggplot(
data = as.data.frame(lrt$table),
mapping = aes(
x = logFC,
y = -log10(p.adjust(PValue, method = "BH")),
color = as.factor(decide_dif_s0.1_tp)
)
) +
geom_point() +
geom_vline(xintercept = log2(2), linetype = "dashed") +
geom_vline(xintercept = -log2(2), linetype = "dashed") +
geom_hline(yintercept = -log10(0.01), linetype = "dashed") +
labs(
title = "Differential gene expression Salmon 0.1 Day5-Day0",
x = "LogFC",
y = "-10log(p-value)",
color = "DE"
) +
scale_color_viridis_d() +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))## Bambu
fit <- glmFit(y_bambu_0.1, design)
lrt <- glmLRT(fit, coef = 6)
decide_dif_b0.1_tp <-
decideTests.DGELRT(lrt,
adjust.method = "BH",
p.value = 0.01,
lfc = log2(2))
summary(decide_dif_b0.1_tp) day5
Down 2093
NotSig 19746
Up 3265
ggplot(
data = as.data.frame(lrt$table),
mapping = aes(
x = logFC,
y = -log10(p.adjust(PValue, method = "BH")),
color = as.factor(decide_dif_b0.1_tp)
)
) +
geom_point() +
geom_vline(xintercept = log2(2), linetype = "dashed") +
geom_vline(xintercept = -log2(2), linetype = "dashed") +
geom_hline(yintercept = -log10(0.01), linetype = "dashed") +
labs(
title = "Differential gene expression Bambu 0.1 Day5-Day0",
x = "LogFC",
y = "-10log(p-value)",
color = "DE"
) +
scale_color_viridis_d() +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))0.2
# Salmon 0.2
y_salmon_0.2 <- calcNormFactors(y_salmon_0.2)
## estimate dispersion
y_salmon_0.2 <- estimateDisp(y_salmon_0.2, design)
## fit the negative binomial model
fit <- glmFit(y_salmon_0.2, design)
## conduct genewise statistical tests for a given coefficient contrast (coef = row of design matrix)
lrtS <- glmLRT(fit, coef = 2:6)
## save lrt for pathway analysis
lrt_pathway <- lrtS
#topTags(lrt)
## multiple testing correction
decide_dif_s0.2 <-
decideTests.DGELRT(lrtS,
adjust.method = "BH",
p.value = 0.01)
summary(decide_dif_s0.2) day5-day4-day3-day2-day1
NotSig 18289
Sig 7723
# Bambu 0.2
y_bambu_0.2 <- calcNormFactors(y_bambu_0.2)
## estimate dispersion
y_bambu_0.2 <- estimateDisp(y_bambu_0.2, design)
## fit the negative binomial model
fit <- glmFit(y_bambu_0.2, design)
## conduct genewise statistical tests for a given coefficient contrast (coef = row of design matrix)
lrtB <- glmLRT(fit, coef = 2:6)
#topTags(lrt)
## multiple testing correction
decide_dif_b0.2 <-
decideTests.DGELRT(lrtB,
adjust.method = "BH",
p.value = 0.01)
summary(decide_dif_b0.2) day5-day4-day3-day2-day1
NotSig 17667
Sig 8345
# overlay results of both technologies
de_genes_0.2 <- as.data.frame(cbind(decide_dif_s0.2, decide_dif_b0.2))
colnames(de_genes_0.2) <- c("Salmon_0.2", "Bambu_0.2")
upset(de_genes_0.2, sets = colnames(de_genes_0.2))# comparison of padj and logfc
padjS <- p.adjust(lrtS$table$PValue, method = "BH")
padjB <- p.adjust(lrtB$table$PValue, method = "BH")
padj <- data.frame(Salmon = padjS, Bambu = padjB)
logfcS <- lrtS$table$logFC.day5
logfcB <- lrtB$table$logFC.day5
logfc <- data.frame(Salmon = logfcS, Bambu = logfcB)
ggplot(data = padj, mapping = aes(x = -log10(Salmon), y = -log10(Bambu))) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "blue") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis() +
labs(title = "Comparison of P-values (NDR=0.2)", x = "-log10 P-value (Salmon)", y = "-log10 P-value (Bambu)")ggplot(data = logfc, mapping = aes(x = Salmon, y = Bambu)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, color = "blue") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) +
scale_color_viridis() +
labs(title = "Comparison of LogFC (NDR=0.2)", x = "LogFC (Salmon)", y = "LogFC (Bambu)")# time point comparison
## Salmon
fit <- glmFit(y_salmon_0.2, design)
lrt <- glmLRT(fit, coef = 6)
decide_dif_s0.2_tp <-
decideTests.DGELRT(lrt,
adjust.method = "BH",
p.value = 0.01,
lfc = log2(2))
summary(decide_dif_s0.2_tp) day5
Down 2556
NotSig 19230
Up 4226
ggplot(
data = as.data.frame(lrt$table),
mapping = aes(
x = logFC,
y = -log10(p.adjust(PValue, method = "BH")),
color = as.factor(decide_dif_s0.2_tp)
)
) +
geom_point() +
geom_vline(xintercept = log2(2), linetype = "dashed") +
geom_vline(xintercept = -log2(2), linetype = "dashed") +
geom_hline(yintercept = -log10(0.01), linetype = "dashed") +
labs(
title = "Differential gene expression Salmon 0.2 Day5-Day0",
x = "LogFC",
y = "-10log(p-value)",
color = "DE"
) +
scale_color_viridis_d() +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))## Bambu
fit <- glmFit(y_bambu_0.2, design)
lrt <- glmLRT(fit, coef = 6)
decide_dif_b0.2_tp <-
decideTests.DGELRT(lrt,
adjust.method = "BH",
p.value = 0.01,
lfc = log2(2))
summary(decide_dif_b0.2_tp) day5
Down 2193
NotSig 20389
Up 3430
ggplot(
data = as.data.frame(lrt$table),
mapping = aes(
x = logFC,
y = -log10(p.adjust(PValue, method = "BH")),
color = as.factor(decide_dif_b0.2_tp)
)
) +
geom_point() +
geom_vline(xintercept = log2(2), linetype = "dashed") +
geom_vline(xintercept = -log2(2), linetype = "dashed") +
geom_hline(yintercept = -log10(0.01), linetype = "dashed") +
labs(
title = "Differential gene expression Bambu 0.2 Day5-Day0",
x = "LogFC",
y = "-10log(p-value)",
color = "DE"
) +
scale_color_viridis_d() +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))Grouping of DE genes
avg_expr <- as.data.frame(sapply(sort(unique(meta_samples$time))[c(1,4,6)], function(time) {
rowMeans(salmon_0.2_gene[, which(meta_samples$time == time)])
}))
avg_expr$V4 <- unlist(as.vector(salmon_0.2_gene[, which(meta_samples$time == 1)]))
avg_expr$V5 <- unlist(as.vector(salmon_0.2_gene[, which(meta_samples$time == 2)]))
avg_expr$V6 <- unlist(as.vector(salmon_0.2_gene[, which(meta_samples$time == 4)]))
avg_expr <- avg_expr[, c(1,4,5,2,6,3)]
colnames(avg_expr) <- sort(unique(meta_samples$time))corr_DEG <- cor(t(avg_expr[DEG,]), method = "spearman")
hcl_DEG <- hclust(as.dist(1 - corr_DEG), method = "complete")
plot(hcl_DEG, label = FALSE)cl_DEG <- cutree(hcl_DEG, k = 4)
heatmap.2(corr_DEG, Rowv = as.dendrogram(hcl_DEG), Colv = as.dendrogram(hcl_DEG),
trace = "none", scale = "none", labRow = NA, labCol = NA, col = viridis,
ColSideColors = rainbow(15)[cl_DEG])avg_expr_DEG_list <- tapply(names(cl_DEG), cl_DEG, function(x) avg_expr[x,])
scaled_expr_DEG_list <- lapply(avg_expr_DEG_list, function(x) t(scale(t(x))))
layout(matrix(1:4, nrow = 2, byrow = T))
for(cl in 1:4)
boxplot(scaled_expr_DEG_list[[cl]],
main = paste0(cl, " (", nrow(scaled_expr_DEG_list[[cl]]), ")"))Function/Pathway analysis
DAVID (frequency-based)
get_ensemble_id <- function(input_string) {
ensemble_id_version <- unlist(strsplit(input_string, "\\."))
return(ensemble_id_version[1])
}
input <- names(which(cl_DEG == 1))
names_cluster1 <- unname(sapply(input, get_ensemble_id))
input <- names(which(cl_DEG == 2))
names_cluster2 <- unname(sapply(input, get_ensemble_id))
input <- names(which(cl_DEG == 3))
names_cluster3 <- unname(sapply(input, get_ensemble_id))
input <- names(which(cl_DEG == 4))
names_cluster4 <- unname(sapply(input, get_ensemble_id))
input <- rownames(y_salmon_0.025)
background <- unname(sapply(input, get_ensemble_id))write.table(
names_cluster1[-which(startsWith(names_cluster1, "Bambu"))],
file = "../genes_C1.txt",
quote = F,
row.names = F,
col.names = F
)
write.table(
names_cluster2,
file = "../genes_C2.txt",
quote = F,
row.names = F,
col.names = F
)
write.table(
names_cluster3,
file = "../genes_C3.txt",
quote = F,
row.names = F,
col.names = F
)
write.table(
names_cluster4,
file = "../genes_C4.txt",
quote = F,
row.names = F,
col.names = F
)
write.table(
background[-which(startsWith(background, "Bambu"))],
file = "../genes_expressed.txt",
quote = F,
row.names = F,
col.names = F
)GSEA (ranked-based)
genesets <- msigdbr(species = "Homo sapiens", category = "C8")
genesets_list <- tapply(genesets$ensembl_gene, genesets$gs_name, list)
fgsea_kegg <- fgsea(
pathways = genesets_list,
stats = ranked_list,
scoreType = "pos",
minSize = 15,
maxSize = 500
)Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (26.66% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.
pathway pval padj
1: FAN_EMBRYONIC_CTX_NSC_1 8.929877e-04 0.0302623602
2: DESCARTES_FETAL_LUNG_VASCULAR_ENDOTHELIAL_CELLS 4.349771e-05 0.0037905144
3: TRAVAGLINI_LUNG_CLUB_CELL 1.172221e-06 0.0003114217
4: BUSSLINGER_GASTRIC_LYZ_POSITIVE_CELLS 1.829102e-05 0.0018595867
5: RUBENSTEIN_SKELETAL_MUSCLE_B_CELLS 3.240715e-07 0.0001976836
6: TRAVAGLINI_LUNG_CD4_NAIVE_T_CELL 3.629272e-06 0.0005534640
7: TRAVAGLINI_LUNG_CD8_MEMORY_EFFECTOR_T_CELL 7.547702e-03 0.1587619992
8: FAN_OVARY_CL0_XBP1_SELK_HIGH_STROMAL_CELL 4.597052e-06 0.0005608403
9: AIZARANI_LIVER_C12_NK_NKT_CELLS_4 5.316876e-03 0.1201220058
10: RUBENSTEIN_SKELETAL_MUSCLE_T_CELLS 1.531582e-06 0.0003114217
log2err ES NES size
1: 0.4772708 0.7915633 1.453507 17
2: 0.5573322 0.7044832 1.378827 46
3: 0.6435518 0.6366332 1.286296 106
4: 0.5756103 0.6283243 1.266456 98
5: 0.6749629 0.6118143 1.250337 163
6: 0.6272567 0.6153934 1.249632 128
7: 0.4070179 0.6579373 1.241656 26
8: 0.6105269 0.6057760 1.234454 143
9: 0.4070179 0.6299482 1.230173 44
10: 0.6435518 0.5981993 1.225006 177